#########################################################################################
##################################  SIMULATION 2   ######################################
#########################################################################################
#########################################################################################

# Run this simulation to generate data for tables and figures.

library(pacman)
p_load(tidyverse, ggplot2, msm, sandwich, here, xtable)
set.seed(12345)
rm(list = ls())

setwd('C:/Users/ganth/Dropbox/StrengthInNumbersReplicationPackage/replicable/simulations')

#########################################################################################

# set up accounting
F1<-c('F','M','M','M','M')
F3<-c('F','F','F','M','M')

gender <- c(rep(F1, 39), rep(F3, 36))
index <- 1:length(gender)
study <- 1
groupindex <- paste0("ACCT",round(ceiling(index/5), 0))
condition <- c(rep('F1', 39*5), rep('F3', 36*5))

df.main <- as.data.frame(cbind(index, groupindex, study, gender, condition))


# set up AH
F1<-c('F','F','M','M','M','M')
F3<-c('F','F','F','F','M','M')

gender <- c(rep(F1, 63), rep(F3, 60))
index <- 1:length(gender)
study <- 2
groupindex <- paste0("AH", round(ceiling(index/6), 0))
condition <- c(rep('F1', 63*6), rep('F3', 60*6))

df.main <- rbind(df.main, as.data.frame(cbind(index, groupindex, study, gender, condition)))
rm(study, condition, F1, F3, gender, groupindex, index)
# all set up



# function to start here
# note that these are taken from the selfpenalty detection program
load(file = "simulations_input_params.rda")
selfpnltyF1 <- modelInputsParams$`Male-majority groups`[modelInputsParams$Measure=="Women's implied self-penalty (in SD)"]
  selfpnltyF1 <- selfpnltyF1[!is.na(selfpnltyF1)]
selfpnltyF3 <- modelInputsParams$`Female-majority groups`[modelInputsParams$Measure=="Women's implied self-penalty (in SD)"]
  selfpnltyF3 <- selfpnltyF3[!is.na(selfpnltyF3)]

simulateEfx <- function(pnlty){
  library(dplyr)

out.matrix <- as.data.frame(matrix(ncol = 7, nrow = reps))
names(out.matrix) <- c('FvoteRatioBest1F', 'FvoteRatioBest3F',
                       'FSelfvoteRatioBest1F', 'FSelfvoteRatioBest3F', 'MSelfvoteRatioBest1F', 'MSelfvoteRatioBest3F', 'discrimSD')

# loop would start here
for (i in 1:reps) {
  df <- df.main
# pure performance
performance <- as.numeric(rnorm(nrow(df.main), mean = 0, sd = 1))
df$performance <- performance
df <- df %>% group_by(groupindex) %>% mutate(performanceRank = rank(performance))

# performance with penalty for women
df <- df %>% mutate(performancePenalized = case_when(gender=="F" ~ performance - pnlty,
                                                     gender=="M" ~ performance,
                                                     TRUE ~ 999))

df <- df %>% group_by(groupindex) %>% mutate(performancePenalizedRank = rank(performancePenalized))

df <- df %>% mutate(performancePenalizedForSelf = case_when(gender=="F" & condition == "F1" ~ performance - selfpnltyF1,
                                                            gender=="F" & condition == "F3 "~ performance - selfpnltyF3,
                                                     gender=="M" ~ performance,
                                                     TRUE ~ 999))
df <- df %>% group_by(groupindex) %>% mutate(performancePenalizedForSelfRank = rank(performancePenalizedForSelf))

bestWdiscrimByGroup <- df %>% filter((performancePenalizedRank==5 & study==1)|
                                       (performancePenalizedRank==6 & study==2)) %>%
                              select(groupindex, gender, index, performance) %>% rename(genderBestAfterPenalty = gender, indexBestAfterPenalty = index, performanceBestAfterPenalty = performance)

secondbestWdiscrimByGroup <- df %>% filter((performancePenalizedRank==4 & study==1)|
                                       (performancePenalizedRank==5 & study==2)) %>%
  select(groupindex, gender, performance) %>% rename(gendersecondBestAfterPenalty = gender, performanceSecondBestAfterPenalty = performance)


df <- merge(df, bestWdiscrimByGroup, by = 'groupindex', all.x = TRUE, all.y = TRUE)

df <- merge(df, secondbestWdiscrimByGroup, by = 'groupindex', all.x = TRUE, all.y = TRUE)

df <- df %>% mutate(genderVoteBest = case_when(gender == "M" ~ genderBestAfterPenalty,
                                           gender == "F" & index != indexBestAfterPenalty ~ genderBestAfterPenalty,
                                           gender == "F" & condition == "F1" & index == indexBestAfterPenalty & ((performanceBestAfterPenalty - performanceSecondBestAfterPenalty) > selfpnltyF1) ~ genderBestAfterPenalty,
                                           gender == "F" & condition == "F1" & index == indexBestAfterPenalty & ((performanceBestAfterPenalty - performanceSecondBestAfterPenalty) <= selfpnltyF1) ~ gendersecondBestAfterPenalty,
                                           gender == "F" & condition == "F3" & index == indexBestAfterPenalty & ((performanceBestAfterPenalty - performanceSecondBestAfterPenalty) > selfpnltyF3) ~ genderBestAfterPenalty,
                                           gender == "F" & condition == "F3" & index == indexBestAfterPenalty & ((performanceBestAfterPenalty - performanceSecondBestAfterPenalty) <= selfpnltyF3) ~ gendersecondBestAfterPenalty,
                                           TRUE ~ "999"),
                    selfVote = case_when(gender == "M" & index == indexBestAfterPenalty ~ 1,
                                         gender == "M" & index != indexBestAfterPenalty ~ 0,
                                         gender == "F" & index != indexBestAfterPenalty ~ 0,
                                         gender == "F" & condition == "F1" & index == indexBestAfterPenalty & ((performanceBestAfterPenalty - performanceSecondBestAfterPenalty) <= selfpnltyF1) ~ 0,
                                         gender == "F" & condition == "F1" & index == indexBestAfterPenalty & ((performanceBestAfterPenalty - performanceSecondBestAfterPenalty) > selfpnltyF1) ~ 1,
                                         gender == "F" & condition == "F3" & index == indexBestAfterPenalty & ((performanceBestAfterPenalty - performanceSecondBestAfterPenalty) <= selfpnltyF3) ~ 0,
                                         gender == "F" & condition == "F3" & index == indexBestAfterPenalty & ((performanceBestAfterPenalty - performanceSecondBestAfterPenalty) > selfpnltyF3) ~ 1,
                                          TRUE ~ 999)
                    )



df <- df %>% mutate(meanvotesFbest = case_when(
  condition == "F1" & study==1 ~ as.numeric(genderVoteBest=="F")/.2,
  condition == "F3" & study==1 ~ as.numeric(genderVoteBest=="F")/.6,
  condition == "F1" & study==2 ~ as.numeric(genderVoteBest=="F")/.33,
  condition == "F3" & study==2 ~ as.numeric(genderVoteBest=="F")/.66,
  TRUE ~ 999),
  meanvotesMbest = case_when(
    condition == "F1" & study == 1 ~ as.numeric(genderVoteBest=="M")/.8,
    condition == "F3" & study == 1 ~ as.numeric(genderVoteBest=="M")/.4,
    condition == "F1" & study == 2 ~ as.numeric(genderVoteBest=="M")/.66,
    condition == "F3" & study == 2 ~ as.numeric(genderVoteBest=="M")/.33,
    TRUE ~ 999))


df <- df %>% mutate(meanvotesSelf = case_when(
  study==1 ~ selfVote/(1/5),
  study==2 ~ selfVote/(1/6),
  TRUE ~ 999))

# construct measures
measures <- df %>% ungroup() %>% group_by(condition) %>% summarize(meanvotesFbest = mean(meanvotesFbest),
                                                     meanvotesMbest = mean(meanvotesMbest))

# construct measures
measuresSelfVote <- df %>% ungroup() %>% group_by(condition, gender) %>% summarize(meanvotesSelf = mean(meanvotesSelf))



out.matrix$FvoteRatioBest1F[i] <- measures$meanvotesFbest[1]
out.matrix$FvoteRatioBest3F[i] <- measures$meanvotesFbest[2]
out.matrix$MvoteRatioBest1F[i] <- measures$meanvotesMbest[1]
out.matrix$MvoteRatioBest3F[i] <- measures$meanvotesMbest[2]

out.matrix$FSelfvoteRatioBest1F[i] <- measuresSelfVote$meanvotesSelf[1]
out.matrix$FSelfvoteRatioBest3F[i] <- measuresSelfVote$meanvotesSelf[3]
out.matrix$MSelfvoteRatioBest1F[i] <- measuresSelfVote$meanvotesSelf[2]
out.matrix$MSelfvoteRatioBest3F[i] <- measuresSelfVote$meanvotesSelf[4]



print(paste(pnlty, "-",i))
}
out.matrix$discrimSD <- pnlty
return(out.matrix)
}


min <- -1
max <- 3
series <- seq(min, max, .01)

parallel <- TRUE
reps <- 1000

start <- Sys.time()
if (parallel == F) {
loopOut <- lapply(series, simulateEfx)
} else {
library(parallel)
no_cores <- detectCores()
cl <- makeCluster(no_cores)
clusterExport(cl, list("reps", "df.main", "selfpnltyF1", "selfpnltyF3"))
loopOut <- parLapply(cl, series, simulateEfx)
stopCluster(cl)
}

end <- Sys.time()
print(paste("runtime:", end - start))

loopOut.df <- do.call(rbind.data.frame, loopOut)
loopOut.df$`Performance penalty for women (SD)` <- loopOut.df$discrimSD

# capture linear models here on the microdata
femaleMajorityTrend <- lm(data = loopOut.df, FvoteRatioBest3F ~ `Performance penalty for women (SD)` +
                            I(`Performance penalty for women (SD)`^2) )
maleMajorityTrend <- lm(data = loopOut.df, FvoteRatioBest1F ~ `Performance penalty for women (SD)` +
                          I(`Performance penalty for women (SD)`^2))

femaleMajorityTrend_inverted <- lm(data = loopOut.df,  `Performance penalty for women (SD)` ~ FvoteRatioBest3F +
                                     I(FvoteRatioBest3F^2) )
maleMajorityTrend_inverted <- lm(data = loopOut.df, `Performance penalty for women (SD)` ~ FvoteRatioBest1F +
                                   I(FvoteRatioBest1F^2))


# now aggregate
loopOut.df.aggregated <- loopOut.df %>% select(-discrimSD) %>%
  group_by(`Performance penalty for women (SD)`) %>%
  summarise_all(list(mean, sd), na.rm = TRUE)

names<-names(loopOut.df.aggregated)
names<- gsub("_fn1","", names)
names<- gsub("_fn2","_sd", names)
names(loopOut.df.aggregated) <- names


loopOut.df.aggregated$FvoteRatioBest3F_ciUB <- loopOut.df.aggregated$FvoteRatioBest3F+1.96*loopOut.df.aggregated$FvoteRatioBest3F_sd
loopOut.df.aggregated$FvoteRatioBest3F_ciLB <- loopOut.df.aggregated$FvoteRatioBest3F-1.96*loopOut.df.aggregated$FvoteRatioBest3F_sd

loopOut.df.aggregated$FvoteRatioBest1F_ciUB <- loopOut.df.aggregated$FvoteRatioBest1F+1.96*loopOut.df.aggregated$FvoteRatioBest1F_sd
loopOut.df.aggregated$FvoteRatioBest1F_ciLB <- loopOut.df.aggregated$FvoteRatioBest1F-1.96*loopOut.df.aggregated$FvoteRatioBest1F_sd

save(loopOut.df, loopOut.df.aggregated, femaleMajorityTrend, maleMajorityTrend, femaleMajorityTrend_inverted, maleMajorityTrend_inverted,max,
     file = "simulations_output_2.Rda")
